In [130]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotnine as gg
from scipy import stats, integrate
sns.set(color_codes=True)
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12,8)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
from plotly import __version__
import cufflinks as cf
from sklearn.preprocessing import scale
In [131]:
hp15 = pd.read_csv("./world-happiness-report/2015.csv")
hp16 = pd.read_csv("./world-happiness-report/2016.csv")
hp17 = pd.read_csv("./world-happiness-report/2017.csv")
raw = pd.read_csv("final_model_data.csv")
In [132]:
hp15 = hp15.set_index('Country')
hp16 = hp16.set_index('Country')
hp17 = hp17.set_index('Country')
hp17 = hp17.rename(columns = {'Whisker.high':'Upper Confidence Interval','Whisker.low':'Lower Confidence Interval',
                             'Happiness.Score': 'Happiness Score', 'Economy..GDP.per.Capita.':'Economy (GDP per Capita)',
                              'Health..Life.Expectancy.':'Health (Life Expectancy)',
                              'Trust..Government.Corruption.':'Trust (Government Corruption)',
                              'Dystopia.Residual':'Dystopia Residual'})
hp17 = hp17.join(hp16.Region)
raw['Happiness Score'] = raw['Life Ladder']
In [138]:
data16 = raw[raw.year ==2016]
data16 = data16.set_index('country')
gdp16 = data16['Log GDP per capita'].dropna()
social16 = data16['Social support'].dropna()
healthy = data16['Healthy life expectancy at birth'].dropna()
freedom16 = data16['Freedom to make life choices'].dropna()
corruption16 = data16['Perceptions of corruption'].dropna()

hpall = raw['Happiness Score'].dropna()
gdpall = raw['Log GDP per capita'].dropna()
socialall = raw['Social support'].dropna()
freedomall = raw ['Freedom to make life choices'].dropna()
healthyall = raw['Healthy life expectancy at birth'].dropna()
corruptionall = raw['Perceptions of corruption'].dropna()
generosityall = raw['Generosity'].dropna()
In [139]:
data16_new=data16.dropna()
data16_new_hp = data16_new['Happiness Score']
data = pd.DataFrame(scale(data16_new),index=data16_new.index, columns= data16_new.columns)
data['Happiness Score'] = data16_new_hp
data = data.sort_values(['Happiness Score'],ascending=True)
In [142]:
sns.set(rc={'figure.figsize':(15,15)})
plt.subplot(421)
sns.distplot(hpall)
plt.subplot(422)
sns.distplot(gdpall)
plt.subplot(423)
sns.distplot(socialall)
plt.subplot(424)
sns.distplot(freedomall)
plt.subplot(425)
sns.distplot(healthyall)
plt.subplot(426)
sns.distplot(corruptionall)
plt.subplot(427)
sns.distplot(generosityall)
Out[142]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c24e987b8>
  • The happiness score is mainly concentrated between 3.5 to 7.5. Almost no people in a county think that they are extremely unhappy, or that the happiness value is 100% satisfactor
In [56]:
cf.go_offline()
data[['Happiness Score','Log GDP per capita','Social support']].iplot(kind='spread')
In [57]:
cf.go_offline()
data[['Happiness Score','Freedom to make life choices']].iplot(kind='spread')
In [58]:
def drawworld(df, year):
    data = dict(type = 'choropleth', 
    locations = df.index,
    locationmode = 'country names',
    colorscale = [[0,"rgb(200, 50, 0)"],[0.85,"rgb(40,0, 190)"],[0.9,"rgb(70, 0, 245)"],
                  [0.94,"rgb(0, 0, 10)"],[0.97,"rgb(106, 0, 247)"],[1,"rgb(, 0, 200)"]],
    z = df['Happiness Score'], 
    text = df.index,
    colorbar = {'title':'Happiness Score'})
    layout = dict(title = 'World Happiness Score in '+str(year), 
                  geo = dict(showframe = False, projection = {'type': 'Mercator'}))
    choromap3 = go.Figure(data = [data], layout=layout)
    iplot(choromap3)
In [121]:
drawworld(hp16,2016)
In [60]:
drawworld(hp17,2017)
In [61]:
hp15['Year'] = '2015'
hp16['Year'] = '2016'
hp17['Year'] = '2017'

hp151617 = pd.concat([hp15[['Happiness Score','Region','Year']],
                      hp16[['Happiness Score','Region','Year']],
                      hp17[['Happiness Score','Region','Year']]])
sns.set(font_scale=2)
fig, axes = plt.subplots(figsize=(20, 14))
sns.boxplot(y='Region',x='Happiness Score',hue='Year', data = hp151617)
Out[61]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c0d03f2b0>
  • Australia and New Zealand, Western Europe and North America have large happiness score
  • Sub-Saharan Africa and Southern Asia have small happiness score.
  • For most area, the happiness score is decreasing with year.
In [62]:
raw_cor = raw[['Life Ladder', 'Log GDP per capita', 'Social support', 'Healthy life expectancy at birth','Freedom to make life choices','Perceptions of corruption']]
raw_cor = raw_cor.dropna()
sns.set(rc={'figure.figsize':(15,15)})
sns.pairplot(raw_cor,kind="reg",diag_kind="kde")
Out[62]:
<seaborn.axisgrid.PairGrid at 0x1c0d0373c8>
In [63]:
raw.rename(columns={'country':'Country'},inplace =True)
raw = raw.set_index('Country')
raw = raw.join(hp15.Region)
In [64]:
raw.head()
Out[64]:
year Life Ladder Log GDP per capita Social support Healthy life expectancy at birth Freedom to make life choices Generosity Perceptions of corruption Happiness Score Region
Country
Afghanistan 2008 3.723590 7.197130 0.450662 47.550438 0.718114 0.183062 0.881686 3.723590 Southern Asia
Afghanistan 2009 4.401778 7.362664 0.552308 47.859673 0.678896 0.204633 0.850035 4.401778 Southern Asia
Afghanistan 2010 4.758381 7.416260 0.539075 48.159512 0.600127 0.138529 0.706766 4.758381 Southern Asia
Afghanistan 2011 3.831719 7.445761 0.521104 48.451160 0.495901 0.176108 0.731109 3.831719 Southern Asia
Afghanistan 2012 3.782938 7.549241 0.520637 48.738346 0.530935 0.247713 0.775620 3.782938 Southern Asia
In [109]:
sns.set(rc={'figure.figsize':(30,30)})

plt.subplot(321)
sns.set(font_scale=3) 
ax = sns.boxplot(x = 'Happiness Score', y = 'Region',data=raw)        #draw the barplot
ax.set(title = "Happiness Score By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16) 

plt.subplot(322)
sns.set(font_scale=3) 
ax = sns.boxplot(x = 'Log GDP per capita', y = 'Region',data=raw)        #draw the barplot
ax.set(title = "Log GDP per capita By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16) 

plt.subplot(323)
sns.set(font_scale=3) 
ax = sns.boxplot(x = 'Social support', y = 'Region',data=raw)        #draw the barplot
ax.set(title = "Social support By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16) 

plt.subplot(324)
sns.set(font_scale=3) 
ax = sns.boxplot(x = 'Healthy life expectancy at birth', y = 'Region',data=raw)        #draw the barplot
ax.set(title = "Healthy life expectancy at birth By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16) 

plt.subplot(325)
sns.set(font_scale=3) 
ax = sns.boxplot(x = 'Freedom to make life choices', y = 'Region',data=raw)        #draw the barplot
ax.set(title = "Freedom By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)

plt.subplot(326)
sns.set(font_scale=3) 
ax = sns.boxplot(x = 'Perceptions of corruption', y = 'Region',data=raw)        #draw the barplot
ax.set(title = "Corruption By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
In [119]:
sns.set(rc={'figure.figsize':(200,200)})
ax = sns.factorplot(x="year", y="Log GDP per capita",col='Region',col_wrap=3,data=raw, kind="bar")
ax.set_xticklabels( rotation = 90)
Out[119]:
<seaborn.axisgrid.FacetGrid at 0x1c2ba74630>